In [1]:
entities = {'self', 'addressee', 'other'}

1 entity referent

  • self ("me")
  • addressee ("you here")
  • other ("somebody else")

2+ entity referent

  • self, addressee ("me and you here" / inclusive we)
  • self, other ("me and somebody else" / exclusive we)
  • addressee, addressee ("the two or more of you here")
  • addressee, other ("one of you here and somebody else")
  • other, other ("the two or more of them")

3+ entity referent

  • self, addressee, addressee ("me and the two or more of you here")
  • self, addressee, other ("me, one of you here, and somebody else")
  • self, other, other ("me and two or more other people")
  • addressee, addressee, other ("the two or more of you and somebody else")
  • addressee, other, other ("one of you and two or more other people")

4+ entity referent

  • self, addressee, addressee, other ("me, the two or more of you here, and somebody else")
  • self, addressee, other, other ("me, one of you here, and two or more other people")
  • addressee, addressee, other, other ("the two or more of you here and two or more other people")

5+ entity referent

  • self, addressee, addressee, other, other ("me, the two or more of you here, and two or more other people")

There are 17 possible markers if there's no distinction between 2 entities of the same type and 3+ entities of the same type.

  • a dual or trial entity number could be added to have a 3-way distinction between e.g. [other, other] and [other, other, other]
  • another entity category besides self, addressee, and other could be added (invisible/divine entities)
  • multiple self referents could be included (choral we)

Also, what about the issue of mis-identifying the cue as "self" rather than "addressee" (kids calling themselves "you")?


In [2]:
from itertools import combinations, combinations_with_replacement

referents = []

for i in xrange(1, len(entities) * 2):
    for combo in combinations_with_replacement(entities, i):
        
        # choral we is impossible
        if combo.count('self') > 1:
            continue
            
        # only singular vs plural
        if combo.count('addressee') > 2:
            continue
            
        if combo.count('other') > 2:
            continue
            
        # compound cues
        referent = list(combo)
        
        for j in xrange(2, len(combo) + 1):
            for compound in combinations(combo, j):
                
                if compound not in referent:
                    referent.append(compound)
            
        referents.append(referent)

In [3]:
len(referents)


Out[3]:
17

In [4]:
referents


Out[4]:
[['addressee'],
 ['self'],
 ['other'],
 ['addressee', 'addressee', ('addressee', 'addressee')],
 ['addressee', 'self', ('addressee', 'self')],
 ['addressee', 'other', ('addressee', 'other')],
 ['self', 'other', ('self', 'other')],
 ['other', 'other', ('other', 'other')],
 ['addressee',
  'addressee',
  'self',
  ('addressee', 'addressee'),
  ('addressee', 'self'),
  ('addressee', 'addressee', 'self')],
 ['addressee',
  'addressee',
  'other',
  ('addressee', 'addressee'),
  ('addressee', 'other'),
  ('addressee', 'addressee', 'other')],
 ['addressee',
  'self',
  'other',
  ('addressee', 'self'),
  ('addressee', 'other'),
  ('self', 'other'),
  ('addressee', 'self', 'other')],
 ['addressee',
  'other',
  'other',
  ('addressee', 'other'),
  ('other', 'other'),
  ('addressee', 'other', 'other')],
 ['self',
  'other',
  'other',
  ('self', 'other'),
  ('other', 'other'),
  ('self', 'other', 'other')],
 ['addressee',
  'addressee',
  'self',
  'other',
  ('addressee', 'addressee'),
  ('addressee', 'self'),
  ('addressee', 'other'),
  ('self', 'other'),
  ('addressee', 'addressee', 'self'),
  ('addressee', 'addressee', 'other'),
  ('addressee', 'self', 'other'),
  ('addressee', 'addressee', 'self', 'other')],
 ['addressee',
  'addressee',
  'other',
  'other',
  ('addressee', 'addressee'),
  ('addressee', 'other'),
  ('other', 'other'),
  ('addressee', 'addressee', 'other'),
  ('addressee', 'other', 'other'),
  ('addressee', 'addressee', 'other', 'other')],
 ['addressee',
  'self',
  'other',
  'other',
  ('addressee', 'self'),
  ('addressee', 'other'),
  ('self', 'other'),
  ('other', 'other'),
  ('addressee', 'self', 'other'),
  ('addressee', 'other', 'other'),
  ('self', 'other', 'other'),
  ('addressee', 'self', 'other', 'other')],
 ['addressee',
  'addressee',
  'self',
  'other',
  'other',
  ('addressee', 'addressee'),
  ('addressee', 'self'),
  ('addressee', 'other'),
  ('self', 'other'),
  ('other', 'other'),
  ('addressee', 'addressee', 'self'),
  ('addressee', 'addressee', 'other'),
  ('addressee', 'self', 'other'),
  ('addressee', 'other', 'other'),
  ('self', 'other', 'other'),
  ('addressee', 'addressee', 'self', 'other'),
  ('addressee', 'addressee', 'other', 'other'),
  ('addressee', 'self', 'other', 'other'),
  ('addressee', 'addressee', 'self', 'other', 'other')]]

Spoken English collapses these to 6 possibilities: I, you, s/he, we, you guys, they


In [5]:
def english(referents):
    # first-person
    if 'self' in referents:
        
        if 'addressee' in referents: # inclusive we
            # doesn't matter who else is being referred to
            return 'we'
        
        if 'other' in referents: # exclusive we
            # doesn't matter who else is being referred to
            return 'we'    
            
        return 'I'
    
    # second-person, if the speaker isn't included
    elif 'addressee' in referents:
        
        if referents.count('addressee') > 1: # inclusive you
            return 'you guys'
        
        if 'other' in referents: # exclusive you
            return 'you guys'
        
        return 'you'
    
    # third-person, if the addressee isn't included either
    elif 'other' in referents:
        
        if referents.count('other') > 1:
            return 'they'
        
        return 's/he'

In [6]:
english(['self', 'addressee'])


Out[6]:
'we'

In [7]:
english(['self', 'other'])


Out[7]:
'we'

In [8]:
english(['addressee', 'other'])


Out[8]:
'you guys'

In [9]:
english(['addressee', 'addressee']) # also ('addressee', 'addressee') compound


Out[9]:
'you guys'

In [10]:
import pandas

data = pandas.DataFrame()

data['Cues'] = referents
data['Outcomes'] = [english(referent) for referent in referents]
data


Out[10]:
Cues Outcomes
0 [addressee] you
1 [self] I
2 [other] s/he
3 [addressee, addressee, (addressee, addressee)] you guys
4 [addressee, self, (addressee, self)] we
5 [addressee, other, (addressee, other)] you guys
6 [self, other, (self, other)] we
7 [other, other, (other, other)] they
8 [addressee, addressee, self, (addressee, addre... we
9 [addressee, addressee, other, (addressee, addr... you guys
10 [addressee, self, other, (addressee, self), (a... we
11 [addressee, other, other, (addressee, other), ... you guys
12 [self, other, other, (self, other), (other, ot... we
13 [addressee, addressee, self, other, (addressee... we
14 [addressee, addressee, other, other, (addresse... you guys
15 [addressee, self, other, other, (addressee, se... we
16 [addressee, addressee, self, other, other, (ad... we

17 rows × 2 columns

Assume that the distribution of referent sets is uniform, which is probably not true.


In [11]:
import numpy

def sampler(p):
    
    def uniform():
        return numpy.random.choice(p)
    
    return uniform

referent_sampler = sampler(len(data))

In [12]:
import ndl

def activation(W):
    return pandas.DataFrame([ndl.activation(c, W) for c in data.Cues], index=data.index)

In [13]:
W = ndl.rw(data, M=100, distribution=referent_sampler)
A = activation(W)
A


Out[13]:
I s/he they we you you guys
0 -0.000848 -0.004426 -0.010796 0.094586 0.051966 0.115364
1 0.038021 -0.004950 -0.012859 0.244965 -0.003339 -0.044676
2 -0.000927 0.055316 0.066743 0.114067 -0.002913 0.074110
3 -0.001350 -0.006548 -0.015506 0.147016 0.048047 0.193614
4 0.036280 -0.010656 -0.027279 0.482914 0.044615 0.028538
5 -0.002198 0.044708 0.041887 0.229084 0.045385 0.275810
6 0.036122 0.043941 0.038201 0.528713 -0.007516 -0.007599
7 -0.001687 0.048333 0.142420 0.160588 -0.004411 0.100827
8 0.035268 -0.013435 -0.033228 0.616866 0.038504 0.078863
9 -0.002760 0.039354 0.030369 0.304269 0.039766 0.394813
10 0.033516 0.029490 0.003721 0.845571 0.034954 0.119107
11 -0.003384 0.035624 0.107613 0.277364 0.041903 0.337897
12 0.034578 0.034353 0.102137 0.654354 -0.009695 -0.001124
13 0.032382 0.021968 -0.011912 1.048176 0.026274 0.188487
14 -0.004007 0.029325 0.091090 0.359332 0.035411 0.474638
15 0.031102 0.017140 0.053163 0.995321 0.029755 0.143911
16 0.029846 0.008438 0.030560 1.225235 0.019831 0.220925

17 rows × 6 columns


In [14]:
pandas.DataFrame([data['Outcomes'], A.idxmax(1), A.idxmax(1) == data['Outcomes']], 
                 index = ['Truth', 'Prediction', 'Accurate?']).T


Out[14]:
Truth Prediction Accurate?
0 you you guys False
1 I we False
2 s/he we False
3 you guys you guys True
4 we we True
5 you guys you guys True
6 we we True
7 they we False
8 we we True
9 you guys you guys True
10 we we True
11 you guys you guys True
12 we we True
13 we we True
14 you guys you guys True
15 we we True
16 we we True

17 rows × 3 columns

With 100 trials, the learner is getting a lot of them right, but just by predicting 'you guys' or 'we' (if self is a referent) all of the time, since those cover most of the referent sets.


In [15]:
import sim

In [16]:
english_learning = sim.Simulation(english, data, referent_sampler, 2000)

In [17]:
import matplotlib.pyplot as plt
%matplotlib inline

In [18]:
trajectory = [english_learning.accuracy(i) for i in xrange(1, english_learning.MAX_M)]

plt.plot(range(1, len(trajectory) + 1), trajectory, '-')
plt.xlabel('Trial Number')


Out[18]:
<matplotlib.text.Text at 0xf6fb198>

In [19]:
%load_ext rpy2.ipython

%Rpush trajectory

In [20]:
%%R

trajectory = data.frame(trial=1:length(trajectory), learned=trajectory)

library('ggplot2')

ggplot(trajectory, aes(trial, learned)) + 
    geom_point(alpha=0.25) + 
    stat_smooth() +
    coord_cartesian(ylim=c(0,1))



In [ ]: